
> ## ---- Functions ----
> extract_qoi_med <- function(x, m, t) {
+   tibble(
+     qoi = "me",
+     est = x$d.avg,
+     se = sd(x$d.avg.sims),
+     lower = x$d.avg.ci[1],
+     upper = x$d.avg.ci[2],
+     p = x$d.avg.p
+   ) %>%
+     dplyr::bind_rows(
+       tibble(
+         qoi = "de",
+         est = x$z.avg,
+         se = sd(x$z.avg.sims),
+         lower = x$z.avg.ci[1],
+         upper = x$z.avg.ci[2],
+         p = x$z.avg.p
+       )
+     ) %>%
+     dplyr::bind_rows(
+       tibble(
+         qoi = "te",
+         est = x$tau.coef,
+         se = sd(x$tau.sims),
+         lower = x$tau.ci[1],
+         upper = x$tau.ci[2],
+         p = x$tau.p
+       )
+     )  %>%
+     dplyr::bind_rows(
+       tibble(
+         qoi = "prop",
+         est = x$n.avg,
+         se = sd(x$n.avg.sims),
+         lower = x$n.avg.ci[1],
+         upper = x$n.avg.ci[2],
+         p = x$n.avg.p
+       )
+     ) %>%
+     dplyr::mutate(imp = m,
+                   tertile = t)
+ }

> extract_qoi_sens <- function(x, m, t) {
+   tibble(
+     qoi = "me",
+     rho = x$rho,
+     est = x$d0,
+     lower = x$lower.d0,
+     upper = x$upper.d0
+   ) %>%
+     dplyr::bind_rows(tibble(
+       qoi = "de",
+       rho = x$rho,
+       est = x$z0,
+       lower = x$lower.z0,
+       upper = x$upper.z0
+     )) %>%
+     dplyr::mutate(
+       r2_m = x$r.square.m,
+       r2_y = x$r.square.y,
+       imp = m,
+       tertile = t
+     )
+ }

> ## ---- Data ----
> ## SOEP data
> load("dat/proc-data/soep_imp.RData")

> ## ---- Shortened TSCS: AfD analyses, 2014-2018 (demeaning) ----
> ## Variable selection for demeaning
> dm_vars <- c(
+   "afd",
+   "cmr_arm",
+   "cold_rent_sqm",
+   "log_hinc_eq",
+   "prop_personal_hinc",
+   "hh_prop_ecact",
+   "hh_mmb",
+   "lm_part_Atypical",
+   "lm_part_Inactive",
+   "lm_part_Unemployed",
+   "lm_part_In Education",
+   "lm_part_Pensioners",
+   "y14",
+   "y15",
+   "y16",
+   "y17",
+   "y18"
+ )

> ## Updated regional data
> regional_data <- rio::import("../../soep-data/soep.v38/stata/regionl.dta") %>%
+   dplyr::filter(kkz > 0) %>%
+   dplyr::select(kkz,
+                 syear,
+                 kr_uemprate,
+                 kr_foreigner) %>%
+   dplyr::rename(year = syear) %>%
+   dplyr::mutate_at(
+     .vars = vars(kr_uemprate,
+                  kr_foreigner),
+     .funs = ~ ifelse(. < 0, NA_real_, .)
+   ) %>%
+   dplyr::distinct() %>%
+   dplyr::group_by(kkz, year) %>%
+   dplyr::summarize_all(mean) %>%
+   dplyr::ungroup()

> ## Data management
> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     dplyr::mutate(y14 = as.numeric(year == 2014),
+                   y15 = as.numeric(year == 2015),
+                   y16 = as.numeric(year == 2016),
+                   y17 = as.numeric(year == 2017),
+                   y18 = as.numeric(year == 2018)) %>%
+     dplyr::mutate(
+       yrs_at_current_address = year - hh_at_current_address,
+       age_at_current_address = age - yrs_at_current_address
+     ) %>%
+     dplyr::mutate(
+       ltr_since_3 = as.numeric(yrs_at_current_address >= 3),
+       ltr_since_5 = as.numeric(yrs_at_current_address >= 5)
+     ) %>%
+     filter(renter_no_rent == 0) %>%
+     dplyr::select(
+       id,
+       hh_id,
+       plz,
+       kkz,
+       year,
+       year_fac,
+       weight,
+       starts_with("cold_rent_load"),
+       starts_with("cold_rent_sqm"),
+       wr_ecego,
+       wr_immig,
+       afd,
+       vote_afd,
+       owner,
+       all_of(dm_vars),
+       starts_with("rent"),
+       starts_with("cmr_arm"),
+       starts_with("log_hinc_eq"),
+       starts_with("gtyp"),
+       starts_with("prop_personal_hinc"),
+       starts_with("hh_prop_ecact"),
+       starts_with("hh_mmb"),
+       starts_with("mover"),
+       starts_with("lm_part"),
+       starts_with("edu"),
+       starts_with("age"),
+       starts_with("east"),
+       starts_with("fem"),
+       starts_with("east"),
+       starts_with("ltr_"),
+       social_housing
+     ) %>%
+     dplyr::filter(not(is.na(rent_umn))) %>%
+     dplyr::filter(not(is.na(rent_cwu))) %>%
+     dplyr::filter(not(is.na(cmr_arm_umn))) %>%
+     dplyr::filter(not(is.na(cmr_arm_cwu)))
+ }

> ## Local market data
> load("dat/proc-data/plz5_f_und_b.RData")

> plz5_f_und_b <- plz5_f_und_b %>%
+   st_set_geometry(NULL) %>%
+   filter(year %in% 2005:2018) %>%
+   dplyr::select(plz, year, cmr_arm, rent) %>%
+   distinct() %>%
+   group_by(plz) %>%
+   mutate(
+     cmr_arm_2005 = mean(cmr_arm[year %in% 2005:2007], na.rm = TRUE),
+     cmr_arm_2018 = mean(cmr_arm[year %in% 2016:2018], na.rm = TRUE),
+     cmr_arm_chg = cmr_arm_2018 - cmr_arm_2005
+   ) %>%
+   mutate(
+     rent_2005 = mean(rent[year %in% 2005:2007], na.rm = TRUE),
+     rent_2018 = mean(rent[year %in% 2016:2018], na.rm = TRUE),
+     rent_chg = rent_2018 - rent_2005,
+     cmr_arm_chg = ifelse(is.na(cmr_arm_chg), rent_chg, cmr_arm_chg),
+     cmr_arm = ifelse(is.na(cmr_arm), rent, cmr_arm)
+   ) %>%
+   ungroup() %>%
+   group_by(year) %>%
+   mutate(
+     rent_cat =
+       ifelse(
+         year == 2004,
+         NA_integer_,
+         cut(
+           rent,
+           breaks = quantile(rent, c(0, 1 / 3, 2 / 3, 1), na.rm = TRUE),
+           labels = paste0("T", 1:3)
+         )
+       ),
+     cmr_arm_cat =
+       cut(
+         cmr_arm,
+         breaks = quantile(cmr_arm, c(0, 1 / 3, 2 / 3, 1), na.rm = TRUE),
+         labels = paste0("T", 1:3)
+       )
+   ) %>%
+   ungroup() %>%
+   dplyr::select(plz, cmr_arm_chg, rent_chg, rent_cat, cmr_arm_cat, year) %>%
+   distinct() %>%
+   na.omit() %>%
+   mutate(
+     cmr_arm_chg_cat =
+       cut(
+         cmr_arm_chg,
+         breaks = quantile(cmr_arm_chg, c(0, 1 / 3, 2 / 3, 1)),
+         labels = paste0("T", 1:3)
+       ),
+     rent_chg_cat =
+       cut(
+         rent_chg,
+         breaks = quantile(rent_chg, c(0, 1 / 3, 2 / 3, 1)),
+         labels = paste0("T", 1:3)
+       )
+   )

> ## Additional data management
> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     # top code household rents per sqm
+     mutate(cold_rent_sqm = case_when(cold_rent_sqm > 40 ~ 40,
+                                      TRUE ~ cold_rent_sqm)) %>%
+     group_by(year) %>%
+     mutate(log_hinc_eq_cat =
+              cut(
+                log_hinc_eq,
+                breaks = quantile(log_hinc_eq, c(0, 1 / 3, 2 / 3, 1)),
+                labels = paste0("T", 1:3)
+              )) %>%
+     ungroup() %>%
+     mutate(
+       gtyp4 = case_when(
+         gtyp %in% c(
+           "[1] Kernstaedte>==gr.Verdraum",
+           "[2] Kernstaedte<==gr.Verdraum",
+           "[9] Kernstaedte<==Verdansatz"
+         ) ~ "urban",
+         gtyp %in% c(
+           "[7] Mi-zent.=laendl=gr.Verdraum",
+           "[10] Mi-zent.=verd.=Verdansatz",
+           "[12] Mi-zent.=laendl=Verdansatz",
+           "[14] Mi-zent.=verd.=laendl.",
+           "[16] Mi-zent.=laendl=laendl."
+         ) ~ "towns",
+         gtyp %in% c(
+           "[3] Mi-zent.=hochverd.=gr.Verdraum",
+           "[5] Mi-zent.=verd.=gr.Verdraum",
+           "[4] so.Gem.=hochverd.=gr.Verdraum",
+           "[6] so.Gem.=verd.=gr.Verdraum"
+         ) ~ "suburban",
+         gtyp %in% c(
+           "[8] so.Gem.=laendl=gr.Verdraum",
+           "[11] so.Gem.=verd.=Verdansatz",
+           "[13] so.Gem.=laendl=Verdansatz",
+           "[15] so.Gem.=verd.=laendl.",
+           "[17] so.Gem.=laendl=laendl."
+         ) ~ "rural",
+         TRUE ~ NA_character_
+       ),
+       gtyp3 = ifelse(gtyp4 == "towns", "rural", gtyp4)
+     ) %>%
+     left_join(plz5_f_und_b,
+               by = c("plz", "year")) %>%
+     dplyr::filter(weight > 0)
+   
+   ## Repair names
+   names(soep_imp$imputations[[m]]) <-
+     gsub(" ", "_", names(soep_imp$imputations[[m]]))
+ }

> ## Join county-level data, within-demeaning
> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     dplyr::select(-dplyr::starts_with("kr_")) %>%
+     dplyr::left_join(regional_data,
+                      by = c("kkz", "year")) %>%
+     dplyr::select(-ends_with("_cwu"),-ends_with("_cwu"))
+ }

> ## 2014:2018 Data for AfD support analyses
> dm_vars <- gsub(" ", "_", dm_vars)

> soep_afd <- soep_imp

> soep_afd$imputations <-
+   lapply(soep_afd$imputations, function (imp) {
+     imp %>%
+       dplyr::filter(year >= 2014) %>%
+       dplyr::select(-dplyr::ends_with("_cwu")) %>%
+       dplyr::select(-dplyr::ends_with("_umn")) %>%
+       group_by(id) %>%
+       mutate(log_hinc_eq_umn = mean(log_hinc_eq, na.rm = T)) %>%
+       mutate(
+         cmr_arm_cat = get_mode(cmr_arm_cat)[1],
+         cmr_arm_chg_cat = get_mode(cmr_arm_chg_cat)[1]
+       ) %>%
+       ungroup() %>%
+       mutate(log_hinc_eq_cat =
+                cut(
+                  log_hinc_eq_umn,
+                  breaks = quantile(log_hinc_eq_umn, c(0, 1 / 3, 2 / 3, 1)),
+                  labels = paste0("T", 1:3)
+                ))
+   })

> ## ---- Estimation -----
> dir.create("est/mediation")

> ## Set up cluster for parallelized computation across imputations
> cl <- makeCluster(length(soep_afd$imputations),
+                   outfile = "est/mediation.txt",
+                   type = "FORK")

> ## Marginal effects
> med <- parLapply(cl,
+                  seq_along(soep_afd$imputations),
+                  function (m) {
+                    ## ---- Seed ----
+                    set.seed(20240503L)
+                    
+                    ## ---- T1 ----
+                    dat1 <- soep_afd$imputations[[m]] %>%
+                      filter(owner == 0) %>%
+                      filter(log_hinc_eq_cat == "T1") %>%
+                      filter(ltr_since_5 == 1) %>%
+                      group_by(id) %>%
+                      mutate_at(.vars = vars(all_of(dm_vars)),
+                                .funs = list(
+                                  umn = ~ mean(., na.rm = T),
+                                  cwu = ~ . - mean(., na.rm = T)
+                                )) %>%
+                      ungroup()
+                    
+                    # Estimate models
+                    mod1_1 <- lm(
+                      afd_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        + year_fac,
+                      data = dat1,
+                      weights = weight
+                    )
+                    
+                    mod2_1 <- lm(
+                      cold_rent_sqm_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        + year_fac,
+                      data = dat1,
+                      weights = weight
+                    )
+                    
+                    mod3_1 <- lm(
+                      afd_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        year_fac +
+                        cold_rent_sqm_cwu,
+                      data = dat1,
+                      weights = weight
+                    )
+                    
+                    ## ---- Adjust df ----
+                    df_adj <- length(unique(dat1$id)) - 1L
+                    mod1_1$df.residual <- mod1_1$df.residual - df_adj
+                    mod2_1$df.residual <- mod2_1$df.residual - df_adj
+                    mod3_1$df.residual <- mod3_1$df.residual - df_adj
+ 
+                    ## ---- T2 ----
+                    dat2 <- soep_afd$imputations[[m]] %>%
+                      filter(owner == 0) %>%
+                      filter(log_hinc_eq_cat == "T2") %>%
+                      filter(ltr_since_5 == 1) %>%
+                      group_by(id) %>%
+                      mutate_at(.vars = vars(all_of(dm_vars)),
+                                .funs = list(
+                                  umn = ~ mean(., na.rm = T),
+                                  cwu = ~ . - mean(., na.rm = T)
+                                )) %>%
+                      ungroup()
+                    
+                    # Estimate models
+                    mod1_2 <- lm(
+                      afd_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        + year_fac,
+                      data = dat2,
+                      weights = weight
+                    )
+                    
+                    mod2_2 <- lm(
+                      cold_rent_sqm_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        + year_fac,
+                      data = dat2,
+                      weights = weight
+                    )
+                    
+                    mod3_2 <- lm(
+                      afd_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        year_fac +
+                        cold_rent_sqm_cwu,
+                      data = dat2,
+                      weights = weight
+                    )
+                    
+                    ## ---- Adjust df ----
+                    df_adj <- length(unique(dat2$id)) - 1L
+                    mod1_2$df.residual <- mod1_2$df.residual - df_adj
+                    mod2_2$df.residual <- mod2_2$df.residual - df_adj
+                    mod3_2$df.residual <- mod3_2$df.residual - df_adj
+                    
+                    ## ---- T3 ----
+                    dat3 <- soep_afd$imputations[[m]] %>%
+                      filter(owner == 0) %>%
+                      filter(log_hinc_eq_cat == "T3") %>%
+                      filter(ltr_since_5 == 1) %>%
+                      group_by(id) %>%
+                      mutate_at(.vars = vars(all_of(dm_vars)),
+                                .funs = list(
+                                  umn = ~ mean(., na.rm = T),
+                                  cwu = ~ . - mean(., na.rm = T)
+                                )) %>%
+                      ungroup()
+                    
+                    # Estimate models
+                    mod1_3 <- lm(
+                      afd_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        + year_fac,
+                      data = dat3,
+                      weights = weight
+                    )
+                    
+                    mod2_3 <- lm(
+                      cold_rent_sqm_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        + year_fac,
+                      data = dat3,
+                      weights = weight
+                    )
+                    
+                    mod3_3 <- lm(
+                      afd_cwu ~
+                        cmr_arm_cwu +
+                        log_hinc_eq_cwu +
+                        prop_personal_hinc_cwu +
+                        hh_prop_ecact_cwu +
+                        hh_mmb_cwu +
+                        lm_part_Atypical_cwu +
+                        lm_part_Inactive_cwu +
+                        lm_part_Unemployed_cwu +
+                        lm_part_In_Education_cwu +
+                        lm_part_Pensioners_cwu +
+                        year_fac +
+                        cold_rent_sqm_cwu,
+                      data = dat3,
+                      weights = weight
+                    )
+                    
+                    ## ---- Adjust df for within-adjustment ----
+                    df_adj <- length(unique(dat3$id)) - 1L
+                    mod1_3$df.residual <- mod1_3$df.residual - df_adj
+                    mod2_3$df.residual <- mod2_3$df.residual - df_adj
+                    mod3_3$df.residual <- mod3_3$df.residual - df_adj
+ 
+                    ## ---- Mediation analyses ----
+                    ## Analyses
+                    med1 <- mediation::mediate(
+                      model.m = mod2_1,
+                      model.y = mod3_1,
+                      treat = "cmr_arm_cwu",
+                      mediator = "cold_rent_sqm_cwu"
+                    )
+                    
+                    med2 <- mediation::mediate(
+                      model.m = mod2_2,
+                      model.y = mod3_2,
+                      treat = "cmr_arm_cwu",
+                      mediator = "cold_rent_sqm_cwu"
+                    )
+                    
+                    med3 <- mediation::mediate(
+                      model.m = mod2_3,
+                      model.y = mod3_3,
+                      treat = "cmr_arm_cwu",
+                      mediator = "cold_rent_sqm_cwu"
+                    )
+                    
+                    ## ---- Sensitivity analyses ----
+                    sens1 <- mediation::medsens(med1,
+                                                rho.by = 0.025,
+                                                effect.type = "both")
+                    
+                    sens2 <- mediation::medsens(med2,
+                                                rho.by = 0.025,
+                                                effect.type = "both")
+                    
+                    sens3 <- mediation::medsens(med3,
+                                                rho.by = 0.025,
+                                                effect.type = "both")
+                    
+                    ## ---- Extraction ----
+                    out_med <- extract_qoi_med(med1, m, 1) %>%
+                      dplyr::bind_rows(extract_qoi_med(med2, m, 2)) %>%
+                      dplyr::bind_rows(extract_qoi_med(med3, m, 3))
+                    
+                    out_sens <- extract_qoi_sens(sens1, m, 1) %>%
+                      dplyr::bind_rows(extract_qoi_sens(sens2, m, 2)) %>%
+                      dplyr::bind_rows(extract_qoi_sens(sens3, m, 3))
+                    
+                    ## ---- Return value ----
+                    return(
+                      list(
+                        med = out_med,
+                        sens = out_sens
+                      )
+                    )
+                  })

> ## Exit parallel computation
> stopCluster(cl)

> ## ---- Combine estimates ----
> med_sum <- lapply(med, function(x) x$med) %>%
+   dplyr::bind_rows() %>%
+   dplyr::arrange(tertile, qoi, imp)

> sens_sum <- lapply(med, function(x) x$sens) %>%
+   dplyr::bind_rows() %>%
+   dplyr::arrange(tertile, qoi, imp, rho)

> ## ---- Export as CSV ----
> med_sum %>%
+   write.csv(paste0("est/mediation_results.csv"))

> sens_sum %>%
+   write.csv(paste0("est/mediation_sensitivity.csv"))
